

###############################
## OLS vs 2SLS coefficients
###############################

load("metadata/iv_replicate.rds")
library("latex2exp")
names(d)

iv.notsig <- which(d$AR_bounded == FALSE | d$AR_p > 0.05)


## OLS vs 2SLS coefficients (scatterplot)
pdf("graphs/Figure5a.pdf", height = 7, width = 7)
par(mar = c(4, 4, 1, 1))
coef.iv <- d$iv_coef / d$ols_analy_se
coef.ols <- d$ols_coef / d$ols_analy_se
xlab <- TeX(r'($\hat{\tau}_{OLS} / \hat{SE}(\hat{\tau}_{OLS})$)') 
ylab <- TeX(r'($\hat{\tau}_{2SLS} / \hat{SE}(\hat{\tau}_{OLS})$)')
plot(1,
  type = "n",
  xlab = "",
  ylab = "",
  xlim = c(-40, 40), ylim = c(-40, 40), cex.lab = 1.2
)
abline(v = 0, h = 0, col = 2, lty = 2)
polygon(c(-1.96, 1.96, 1.96, -1.96), c(-400, -400, 400, 400), col = "#AAAAAA50", border = NA)
abline(c(0, 1), col = "gray30", lty = 2)
points(coef.ols, coef.iv, cex = 1.2, pch = 16, col = "#77777750")
points(coef.ols, coef.iv, cex = 1.2, lwd = 0.7)
mtext(ylab, 2, 2.5)
mtext(xlab, 1, 3)
graphics.off()


######################
## Ratio histogram
######################

load("metadata/iv_replicate.rds")
names(d)

exper <- which(d$experimental == 1)
d$ratio <- d$iv_coef / (d$ols_coef)
d$ratio2 <- (d$iv_coef - d$ols_coef) / d$ols_coef
x0 <- c(0.3, 1, 3, 10, 30, 100, 300)

pdf("graphs/Figure5b.pdf")
xlab <- TeX(r'($|\hat{\tau}_{2SLS} / \hat{\tau}_{OLS}|$)')
par(mar = c(4, 4, 1, 1))
hist(log10(abs(d$ratio)),
  breaks = 30, main = "", ylim = c(0, 10), xlim = c(-0.6, 2.8),
  xlab = xlab, ylab = "#Papers", cex.lab = 1.2,
  axes = FALSE
)
box()
axis(2)
axis(1, at = log10(x0), labels = x0)
ratio.mean <- mean(abs(d$ratio))
ratio.median <- median(abs(d$ratio))
abline(v = log10(ratio.mean), lty = 2, col = 4, lwd = 2)
abline(v = log10(ratio.median), lty = 2, col = 2, lwd = 2)
text(2.2, 9.8, paste("    Mean =", round(ratio.mean, 1)), cex = 1.3)
text(2.2, 9.3, paste("Median = ", round(ratio.median, 1)), cex = 1.3)
graphics.off()



###################################
# Relationship with First Stage
###################################


load("metadata/iv_replicate.rds")
names(d)

d$weight <- 1
d$weight[which(d$multiple_cases == 1)] <- 0.5
names(d)
exper <- which(d$experimental == 1)
length(exper)
d$ratio <- d$iv_coef / (d$ols_coef)
d$ratio2 <- (d$iv_coef - d$ols_coef) / d$ols_coef


## Exp vs. Non-experimental
pdf("graphs/Figure5c.pdf")
xlab <- TeX(r'($|\hat{\rho}(d, \hat{d})|$)')
ylab <- TeX(r'($|\hat{\tau}_{2SLS} / \hat{\tau}_{OLS}|$)')
par(mar = c(4, 4, 1, 1))
ylim <- c(-0.5, 2.5)
exper1 <- which(d$experimental == 1)
exper0 <- which(d$experimental == 0)
plot(1, type = "n", xlim = c(-0.1, 1.1), ylim = ylim, axes = FALSE,
  xlab = "", ylab = "")
reg0 <- lm(log10(abs(d$ratio[exper0])) ~ abs(d[exper0, "rho"]), weights = d$weight[exper0])
f <- summary(reg0)$fstatistic
p0 <- pf(f[1], f[2], f[3], lower.tail = FALSE)
reg1 <- lm(log10(abs(d$ratio[exper1])) ~ abs(d[exper1, "rho"]), weights = d$weight[exper1])
f <- summary(reg1)$fstatistic
p1 <- pf(f[1], f[2], f[3], lower.tail = FALSE)
abline(reg1, col = 2, lwd = 1.5)
abline(reg0, col = "gray50", lwd = 2)
points(abs(d[exper0, "rho"]), log10(abs(d$ratio[exper0])), col = "#00000050", pch = 16, cex = 1.2)
points(abs(d[exper1, "rho"]), log10(abs(d$ratio[exper1])), col = "#FF000050", pch = 16, cex = 1.2)
points(abs(d[exper0, "rho"]), log10(abs(d$ratio[exper0])), cex = 1.2)
points(abs(d[exper1, "rho"]), log10(abs(d$ratio[exper1])), col = 2, cex = 1.2)
y0 <- c(0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100, 300)
axis(1); box()
axis(2, at = log10(y0), labels = y0)
text(0.73, 2.45, paste0(
  "Observational:   R2 Adj. = ", sprintf("%.3f", summary(reg0)$adj.r.squared),
  ", p = ", sprintf("%.3f", p0)
), cex = 1.2)
text(0.73, 2.30, paste0(
  " Experimental:   R2 Adj. = ", sprintf("%.3f", summary(reg1)$adj.r.squared),
  ", p = ", sprintf("%.3f", p1)
), cex = 1.2, col = "#FF000080")
mtext(xlab, 1, 3)
mtext(ylab, 2, 2.5)
graphics.off()



# Among those report significant OLS results
d$weight <- 1
d$weight[which(d$multiple_cases == 1)] <- 0.5
d$ratio <- d$iv_coef / (d$ols_coef)
d$ratio2 <- (d$iv_coef - d$ols_coef) / d$ols_coef
sum(!is.na(d$ols_report_coef)) # 38
sum(!is.na(d$ols_report_coef) & sign(d$ols_report_coef) == sign(d$iv_report_coef)) # 36

condition <- !is.na(d$ols_report_coef) & abs(d$ols_report_coef / d$ols_report_se) >= 1.96 & d$ols_in_main == TRUE
pdf("graphs/Figure5d.pdf")
xlab <- TeX(r'($|\hat{\rho}(d, \hat{d})|$)')
ylab <- TeX(r'($|\hat{\tau}_{2SLS} / \hat{\tau}_{OLS}|$)')
par(mar = c(4, 4, 1, 1))
ylim <- c(-0.5, 2.5)
exper <- d$experimental == 1
plot(1,
  type = "n", xlim = c(-0.1, 1.1), ylim = ylim, axes = FALSE,
  xlab = "", ylab = "")
reg <- lm(log10(abs(d$ratio[condition])) ~ abs(d[condition, "rho"]), weights = d$weight[condition])
f <- summary(reg)$fstatistic
p <- pf(f[1], f[2], f[3], lower.tail = FALSE)
abline(reg, col = "gray50", lwd = 2)
points(abs(d[!condition, "rho"]), log10(abs(d$ratio[!condition])), col = "#99999970", pch = 16, cex = 1.3)
points(abs(d[condition & !exper, "rho"]), log10(abs(d$ratio[condition & !exper])), col = "#00000050", pch = 16, cex = 1.2)
points(abs(d[condition & exper, "rho"]), log10(abs(d$ratio[condition & exper])), col = "#FF000050", pch = 16, cex = 1.2)
points(abs(d[condition & !exper, "rho"]), log10(abs(d$ratio[condition & !exper])), col = 1, cex = 1.2)
points(abs(d[condition & exper, "rho"]), log10(abs(d$ratio[condition & exper])), col = 2, cex = 1.2)
y0 <- c(0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100, 300)
axis(1); box()
axis(2, at = log10(y0), labels = y0)
text(0.88, 2.45, paste(
  "R2 Adj. =", sprintf("%.3f", summary(reg)$adj.r.squared),
  ", p =", sprintf("%.3f", p)
), cex = 1.2)
mtext(xlab, 1, 3)
mtext(ylab, 2, 2.5)
graphics.off()

######################################
# Weather/Geography/History IVs
######################################


condition <- d$iv_subtype %in% c("weather", "climate", "geography", "geography / diffusion", "history")
pdf("graphs/FigureA2.pdf")
par(mar = c(4,4,1,1))
ylim <- c(-0.5,2.5)
plot(1, type = "n", xlim = c(-0.1, 1.1), ylim = ylim, axes = FALSE,
     xlab = "", ylab = "")
reg <- lm(log10(abs(d$ratio[condition]))~abs(d[condition, "rho"]), weights = d$weight[condition])
f <- summary(reg)$fstatistic
p <- pf(f[1],f[2],f[3],lower.tail=FALSE)
abline(reg, col = "gray50", lwd = 2)
points(abs(d[!condition, "rho"]), log10(abs(d$ratio[!condition])), col = "#AAAAAA70", pch = 16, cex = 1.3)
points(abs(d[condition, "rho"]), log10(abs(d$ratio[condition])), col = "#000000", pch = 16, cex = 1.2)
y0 <- c(0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100, 300)
axis(1); box()
axis(2, at = log10(y0), labels = y0)
text(0.8, 0.6, paste("R2 Adj. =",sprintf("%.3f",summary(reg)$adj.r.squared),
                       ", p =", sprintf("%.3f", p)), cex = 1.2)
legend("topleft", c("Weather/Geography/History", "Others"), col = c("#000000", "#AAAAAA70"), 
	pch = 16, cex = 1.2, bty = "n")			
mtext(xlab, 1, 3)
mtext(ylab, 2, 2.5)		   
graphics.off()